library(tidyverse)
library(nFactors)

# 1) Data Processing ####

respid <- readRDS("respid.RDS")

oD <- readstata13::read.dta13("ACPP_data.dta") %>%
  dplyr::select(contains("W20_") ,contains("W21_"), "RESPID", contains("SD_"))

w20 <- oD %>% dplyr::select("RESPID", contains("W20_") & contains("Q53"))  %>%
  mutate(pop1 = factor(W20_Q53A1,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         pop2 = factor(W20_Q53A2,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         pop3 = factor(W20_Q53A3,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         pop4 = factor(W20_Q53A4,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         pop5 = factor(W20_Q53A5,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         pop6 = factor(W20_Q53A6,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T)) %>%
  dplyr::select("RESPID", contains("pop")) %>%
  drop_na()

w21 <- oD %>% dplyr::select("RESPID", contains("W21_") & contains("Q59"))   %>%
  mutate(scp1 = factor(W21_Q59A1,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp2 = factor(W21_Q59A2,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp3 = factor(W21_Q59A3,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp4 = factor(W21_Q59A4,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp5 = factor(W21_Q59A5,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp6 = factor(W21_Q59A6,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp7 = factor(W21_Q59A7,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T),
         scp8 = factor(W21_Q59A8,
                       levels = c("1 = Trifft voll und ganz zu", "2 = Trifft eher zu",
                                  "3 = Teils-teils", "4 = Trifft eher nicht zu",
                                  "5 = Trifft gar nicht zu"), ordered = T)) %>%
  dplyr::select("RESPID", contains("scp")) %>%
  drop_na()

# wave 20 

subitems <- paste0("pop", 1:6)

populism <- w20[subitems] %>%
  mutate(pop1 = as.numeric(pop1),
         pop2 = as.numeric(pop2),
         pop3 = as.numeric(pop3),
         pop4 = as.numeric(pop4),
         pop5 = as.numeric(pop5),
         pop6 = as.numeric(pop6))


ev <- eigen(cor(populism)) # get eigenvalues
ap <- nFactors::parallel(subject=nrow(populism),var=ncol(populism),
                         rep=100,cent=.05)
nS <- nFactors::nScree(x=ev$values, aparallel=ap$eigen$qevpea)
nFactors::plotnScree(nS)

#correlation matrix 
corMat <- cor(populism)
#oblique principal-axis exploratory factor analysis:
solution <- psych::fa(r = corMat, nfactors = 1,
                      rotate = "promax", fm = "pa",
                      SMC=FALSE) 

print(solution)

CFA_pop <- '
  populism_w20 =~ pop1 + pop2 + pop3 + pop4 + pop5 + pop6
'

fit_w20 <- lavaan::cfa(CFA_pop,
                       data = w20,
                       estimator = "WLSMV")

round(lavaan::fitmeasures(fit_w20)[c("chisq", "pvalue", "df", "cfi", "rmsea", "srmr")],3)

lavaan::inspect(fit_w20, what="std")$lambda

# science populism 

w21_scree <- w21 %>%
  mutate(scp1 = as.numeric(scp1),
         scp2 = as.numeric(scp2),
         scp3 = as.numeric(scp3),
         scp4 = as.numeric(scp4),
         scp5 = as.numeric(scp5),
         scp6 = as.numeric(scp6),
         scp7 = as.numeric(scp7),
         scp8 = as.numeric(scp8))

ev <- eigen(cor(w21_scree %>% select(-"RESPID"))) # get eigenvalues
ap <- parallel(subject=nrow(w21_scree),var=ncol(w21_scree)-1,
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS) 


CFA_scp <- '
  science_populism =~ scp1 + scp2 + scp3 + scp4 + scp5 + scp6 + scp7 + scp8
  
  scp1 ~~ scp2
  scp5 ~~ scp6
  #covariations based on modindices
'

fit_w21 <- lavaan::cfa(CFA_scp,
                       data = w21,
                       estimator = "WLSMV")

round(lavaan::fitmeasures(fit_w21)[c("chisq", "pvalue", "df", "cfi", "rmsea", "srmr")],3)

lavaan::inspect(fit_w21, what="std")$lambda

lavaan::modificationindices(fit_w21, sort = T, minimum.value = 3.85)


CFA_scp4 <- '
  ordinary =~ scp1 + scp2 
  academic =~  scp3 + scp4
  decision =~ scp5 + scp6 
  truth =~ scp7 + scp8

'

fit_w21_4 <- lavaan::cfa(CFA_scp4,
                         data = w21,
                         estimator = "WLSMV")

round(lavaan::fitmeasures(fit_w21_4)[c("chisq", "pvalue", "df", "cfi", "rmsea", "srmr")],3)

lavaan::inspect(fit_w21_4, what="std")$lambda

#output from laavan

out_w20 <- cbind(w20, lavaan::predict(fit_w20))

out_w21 <- cbind(w21, lavaan::predict(fit_w21), lavaan::predict(fit_w21_4))

df_fa <- dplyr::left_join(oD %>% 
                         dplyr::select(RESPID),
                       out_w20 %>%
                     dplyr::select(RESPID, contains("pop")),
                   by = "RESPID") %>%
  dplyr::left_join(., out_w21,
                   by = "RESPID") #%>%
  #drop_na()

df <- dplyr::left_join(oD %>% 
                         dplyr::select(RESPID),
                       out_w20 %>%
                     dplyr::select(RESPID, populism_w20),
                   by = "RESPID") %>%
  dplyr::left_join(., out_w21 %>%
                     dplyr::select(-contains("scp")),
                   by = "RESPID") %>%
  dplyr::select(RESPID, populism_w20, science_populism:truth) %>%
  #drop_na() %>%
  mutate(populism_w20 = populism_w20 * -1,
         science_populism = science_populism * -1,
         ordinary = ordinary * -1,
         academic = academic * -1,
         decision = decision * -1,
         truth = truth * -1)

# Table 1 & Figure 1 ####

df_fa <- df_fa %>%
  filter(RESPID %in% respid$RESPID) %>%
  select(contains("pop"), contains("scp"), -populism_w20, -science_populism) %>%
  drop_na() %>%
  mutate_all(funs(as.numeric(.)))

ev <- eigen(cor(df_fa)) # get eigenvalues
ap <- nFactors::parallel(subject=nrow(df_fa),var=ncol(df_fa),
                         rep=100,cent=.05)
nS <- nFactors::nScree(x=ev$values, aparallel=ap$eigen$qevpea)
nFactors::plotnScree(nS)

#correlation matrix 
corMat <- cor(df_fa)
#oblique principal-axis exploratory factor analysis:
solution <- psych::fa(r = corMat, nfactors = 2,
                      rotate = "promax", fm = "pa",
                      SMC=FALSE) 

print(solution, digits = 2, cut = 0)
# This line proides the output for Table 1

as.data.frame(nS$Analysis) %>%
  select(Eigenvalues, Par.Analysis) %>%
  mutate(ndim = 1:nrow(.)) %>%
  gather(Eigenvalues:Par.Analysis, key = "metric", value = "value") %>%
  ggplot(., aes(x=ndim, y = value, colour = metric, shape = metric)) + 
  geom_hline(yintercept = 1, lty = "dotted") +
  geom_line() +
  geom_point() + 
  theme_classic() +
  labs(x = "Components", y = "Eigenvalues") + 
  scale_colour_discrete("Metric:") + 
  scale_shape_manual("Metric:", values = c(16,2)) + 
  scale_y_continuous(breaks = c(1:7)) + 
  scale_x_continuous(breaks = c(1:15)) + 
  theme(legend.position = "bottom")

ggsave("./figures/Figure1.png",
       width = 15,
       height = 7,
       units = "cm")

# These lines provide more evidence for the two-factor solution.
#We discuss this below figure 1

solution <- psych::fa(r = corMat, nfactors = 3,
                      rotate = "promax", fm = "pa",
                      SMC=FALSE) 

print(solution, digits = 2, cut = 0.2)

solution <- psych::fa(r = corMat, nfactors = 4,
                      rotate = "promax", fm = "pa",
                      SMC=FALSE) 

print(solution, digits = 2, cut = 0.2)

solution <- psych::fa(r = corMat, nfactors = 5,
                      rotate = "promax", fm = "pa",
                      SMC=FALSE) 

print(solution, digits = 2, cut = 0.2)

# Figure 2 ####


df <- dplyr::left_join(df, oD %>%
                         dplyr::select(RESPID,
                                       left_right = W20_Q58,
                                       trust_sci = W21_Q58A8,
                                       trust_gov = W21_Q58A5,
                                       trust_EU = W21_Q58A7,
                                       trust_me = W21_Q58A1,
                                       sat_dem = W21_Q60,
                                       gender = SD_GENDER,
                                       edu = SD_EDU,
                                       birthyear = SD_BIRTHYEAR,
                                       econ = W21_Q86,
                                       pol_int = W20_Q48,
                                       ptv_fpoe = W21_Q107A3,
                                       con_cov1 = W21_Q96A1,
                                       con_cov2 = W21_Q96A2,
                                       con_cov3 = W21_Q96A3,
                                       #con_cov4 = W21_Q96A4,
                                       con_cov5 = W21_Q96A5,
                                       con_cov6 = W21_Q96A6,
                                       con_soro = W21_Q96A7,
                                       con_clim = W21_Q96A8,
                                       con_secr = W21_Q96A9,
                                       experts = W20_Q52A1,
                                       vote = W20_Q98,
                                       imm = W20_Q59A7) %>%
                         mutate(left_right_num = as.numeric(left_right)-1,
                                left_right_num = ifelse(left_right_num > 10, NA, left_right_num),
                                trust_sci_num = as.numeric(trust_sci)-1,
                                trust_sci_num = ifelse(trust_sci_num > 10, NA, trust_sci_num),
                                trust_gov_num = as.numeric(trust_gov)-1,
                                trust_gov_num = ifelse(trust_gov_num > 10, NA, trust_gov_num),
                                trust_me_num = as.numeric(trust_me)-1,
                                trust_me_num = ifelse(trust_me_num > 10, NA, trust_me_num),
                                trust_EU_num = as.numeric(trust_EU)-1,
                                trust_EU_num = ifelse(trust_EU_num > 10, NA, trust_EU_num),
                                sat_dem = as.numeric(sat_dem),
                                sat_dem = sat_dem * -1 +  6,
                                age = 2021 - birthyear,
                                gender = factor(gender,
                                                level = c("Maennlich", "Weiblich"),
                                                labels = c("M", "W")),
                                edu3 = factor(ifelse(edu %in% c("Volksschule oder weniger",
                                                                "Hauptschule oder AHS Unterstufe"), "Low",
                                                     ifelse(edu %in% c("Polytechnikum, BMS (Fachschule, z.B. HASCH)",
                                                                       "Lehre, Berufsschule"), "Medium",
                                                            ifelse(is.na(edu), NA,
                                                                   ifelse(edu == "keine Angabe", NA,"High")))),
                                              levels = c("Medium", "Low", "High")),
                                econ = ifelse(as.numeric(econ) > 5, NA, as.numeric(econ)),
                                econ = econ * -1 + 5,
                                pol_int = ifelse(as.numeric(pol_int) > 4, NA, as.numeric(pol_int)),
                                pol_int = pol_int * -1 + 5,
                                ptv_fpoe = ifelse(as.numeric(ptv_fpoe) >11, NA, as.numeric(ptv_fpoe)-1),
                                vote_fpoe = ifelse(vote == "fpOe", 1, 0), 
                                con_cov1 = as.numeric(con_cov1),
                                con_cov2 = as.numeric(con_cov2)*-1 + 6,
                                con_cov3 = as.numeric(con_cov3),
                                #con_cov4 = as.numeric(con_cov4),
                                con_cov5 = as.numeric(con_cov5),
                                con_cov6 = as.numeric(con_cov6),
                                con_cov = (con_cov1 + con_cov2 + con_cov3 + con_cov5 + con_cov6)/6,
                                gen_cons = (as.numeric(con_secr) + as.numeric(con_clim) + as.numeric(con_soro))/3,
                                con_clim = as.numeric(con_clim)*-1 + 6,
                                experts = ifelse(as.numeric(experts) > 5, NA, as.numeric(experts)* -1 + 6),
                                imm = as.numeric(imm)*-1 + 6,
                                imm = ifelse(imm < 1, NA, imm)),
                       by = "RESPID")

var_list <- c("science_populism", "populism_w20",
              "trust_gov_num", "trust_sci_num", "experts", "con_cov",
              "age", "left_right_num", "econ", "pol_int")

df_stand <- df %>%
  select(all_of(var_list), "gender", edu3) %>%
  mutate_at(vars(var_list), .funs = function(x){scales::rescale(x, to = c(0, 1), na.rm = T)}) %>%
  drop_na()

saveRDS(respid <- df %>%
          select(var_list, "gender", edu3, "RESPID") %>%
          drop_na() %>%
          select("RESPID"), "respid.RDS") #complete observations

cor <- cor.test(df_stand$science_populism, df_stand$populism_w20)

ggplot(df_stand, aes(x=populism_w20, y = science_populism)) +
  geom_hline(yintercept = mean(df_stand$science_populism), lty = "dotted") +
  geom_vline(xintercept = mean(df_stand$populism_w20), lty = "dotted") + 
  ggtext::geom_textbox(x = .3, y = .9, label = paste0("r (",cor$parameter, ") = ", round(cor$estimate,2),", p < 0.001")) +
  geom_point(alpha = .25) +
  theme_minimal() +
  labs(x = "Political Populism", y = "Science Populism") +
  annotate("text", x = 0.05, y = 0.05, label = "PP-Lo\nSP-Lo") +
  annotate("text", x = 0.05, y = 0.9, label = "PP-Lo\nSP-Hi") +
  annotate("text", x = 0.95, y = 0.05, label = "PP-Hi\nSP-Lo") +
  annotate("text", x = 0.95, y = 0.9, label = "PP-Hi\nSP-Hi") +
  NULL

ggsave("./figures/Figure2.png",
       width = 17,
       height = 8,
       units = "cm")

#Table 2 ####

covariates <- "age + gender + edu3 + left_right_num + I(left_right_num^2) + econ + pol_int"

m_pop <- lm(paste0("populism_w20 ~ ", covariates),
            data = df_stand)

summary(m_pop)

prediction::prediction_summary(m_pop, variables = "left_right_num", at = list(left_right_num = seq(0,1, by = .1))) %>%
  rename("left_right_num" = 'at(left_right_num)') %>%
  ggplot(., aes(x = left_right_num, y = Prediction, ymin = lower, ymax = upper)) +
  geom_pointrange() +
  theme_minimal() + 
  labs(x = "Left-Right Self-Placement", y = "Predicted Political Populism")

ggsave("./figures/FigureA1.png", width = 15, height = 9, unit = "cm")

m_scp <- lm(paste0("science_populism ~ ", covariates),
            data = df_stand)

summary(m_scp)

prediction::prediction_summary(m_scp, variables = "left_right_num", at = list(left_right_num = seq(0,1, by = .1))) %>%
  rename("left_right_num" = 'at(left_right_num)') %>%
  ggplot(., aes(x = left_right_num, y = Prediction, ymin = lower, ymax = upper)) +
  geom_pointrange() +
  theme_minimal() + 
  labs(x = "Left-Right Self-Placement", y = "Predicted Science Populism")

ggsave("./figures/FigureA2.png", width = 15, height = 9, unit = "cm")

df_stand$diff <- df_stand$populism_w20 - df_stand$science_populism

m_diff <- lm(paste0("diff ~ ", covariates),
             data = df_stand)

summary(m_diff)

texreg::htmlreg(list(m_pop, m_scp, m_diff),
                file = "./tables/Table2.html", 
                single.row = T, 
                custom.model.names = c("Political Populism (PP)", "Science Populism (SP)", "Diff. PP/SP"),
                custom.coef.names = c(NA, "Age", "Female",
                                      "Education -- Low", "Education -- Higher",
                                      "Left-Right Self-Placement", "Left-Right Self-Placement sq",
                                      "Financial Situation", "Political Interest"),
                digits = 2,
                leading.zero = T, caption = "",
                custom.note = "%stars. Entries are unstandardised coefficients from an OLS regression. Standard errors in brackets. Both dependent variables are extracted from a confirmatory factor analysis. All variables are standardised between 0 and 1.")

# Table 3  and Table A1 ####

m1A <- lm(paste0("trust_gov_num ~ populism_w20 + ", covariates),
          data = df_stand)

m2A <- lm(paste0("trust_gov_num ~ science_populism + ", covariates),
                data = df_stand)

m3A <- lm(paste0("trust_gov_num ~ populism_w20 + science_populism + ", covariates),
                data = df_stand)

texreg::screenreg(list(m1A, m2A, m3A), stars = .1)

texreg::htmlreg(list(m1A, m2A, m3A),
                file = "./tables/Table3.html",
                single.row = T,
                custom.model.names = c("Political Populism",  "Science Populism", "Both"),
                custom.coef.names = c(NA, "Political Populism",
                                      "Age", "Female",
                                      "Education -- Low", "Education -- Higher",
                                      "Left-Right Self-Placement",
                                      "Left-Right Self-Placement sq",
                                      "Financial Situation", "Political Interest",  "Science Populism"),
                reorder.coef = c(2, 11, 3:10, 1),
                digits = 2,
                leading.zero = T,
                caption = "",
                custom.note = "%stars. Entries are unstandardised coefficients from an OLS regression. Standard errors in brackets. The dependent variable is trust in government, measured on a 11-point scale from 0 (no trust at all) to 10 (complete trust). All variables are standardised between 0 and 1.")


#Table 4 and Table A2 ####

m1B <- lm(paste0("trust_sci_num ~ populism_w20 + ", covariates),
          data = df_stand)

m2B <- lm(paste0("trust_sci_num ~ science_populism + ", covariates),
          data = df_stand)

m3B <- lm(paste0("trust_sci_num ~ science_populism +  populism_w20 + ", covariates),
          data = df_stand)

texreg::htmlreg(list(m1B, m2B, m3B), 
                file = "./tables/Table4.html",
                single.row = T,
                custom.model.names = c("Political Populism",  "Science Populism", "Both"),
                custom.coef.names = c(NA, "Political Populism",
                                      "Age", "Female",
                                      "Education -- Low", "Education -- Higher",
                                      "Left-Right Self-Placement",
                                      "Left-Right Self-Placement sq",
                                      "Financial Situation", "Political Interest",  "Science Populism"),
                reorder.coef = c(2, 11, 3:10, 1),
                digits = 2,
                leading.zero = T,
                caption = "",
                custom.note = "%stars. Entries are unstandardised coefficients from an OLS regression. Standard errors in brackets. The dependent variable is trust in science, measured on a 11-point scale from 0 (no trust at all) to 10 (complete trust). All variables are standardised between 0 and 1.")

#Table 5 and Table A3 ####

m1C <- lm(paste0("experts ~ populism_w20 + ", covariates),
          data = df_stand)

m2C <- lm(paste0("experts ~ science_populism + ", covariates),
          data = df_stand)

m3C <- lm(paste0("experts ~ populism_w20 + science_populism + ", covariates),
          data = df_stand)

texreg::htmlreg(list(m1C, m2C, m3C), 
                file = "./tables/Table5.html",
                single.row = T,
                custom.model.names = c("Political Populism",  "Science Populism", "Both"),
                custom.coef.names = c(NA, "Political Populism",
                                      "Age", "Female",
                                      "Education -- Low", "Education -- Higher",
                                      "Left-Right Self-Placement",
                                      "Left-Right Self-Placement sq",
                                      "Financial Situation", "Political Interest",  "Science Populism"),
                reorder.coef = c(2, 11, 3:10, 1),
                digits = 2,
                leading.zero = T,
                caption = "",
                custom.note = "%stars. Entries are unstandardised coefficients from an OLS regression. Standard errors in brackets. The dependent variable is agreement with the statement that 'It is better for important policy decisions to be taken on the basis of scientific evidence by independent experts rather than by elected politicians', measured on a 5-point scale from 1 (completely disagree) to 5 (completely agree). All variables are standardised between 0 and 1.")

#Table 6 and Table A4 ####

m1D <- lm(paste0("con_cov ~ populism_w20 + ", covariates),
          data = df_stand)

m2D <- lm(paste0("con_cov ~ science_populism + ", covariates),
          data = df_stand)

m3D <- lm(paste0("con_cov ~ populism_w20 + science_populism + ", covariates),
          data = df_stand)

texreg::screenreg(list(m1D, m2D, m3D))

texreg::htmlreg(list(m1D, m2D, m3D), 
                file = "./tables/Table6.html",
                single.row = T,
                custom.model.names = c("Political Populism",  "Science Populism", "Both"),
                custom.coef.names = c(NA, "Political Populism",
                                      "Age", "Female",
                                      "Education -- Low", "Education -- Higher",
                                      "Left-Right Self-Placement",
                                      "Left-Right Self-Placement sq",
                                      "Financial Situation", "Political Interest",  "Science Populism"),
                reorder.coef = c(2, 11, 3:10, 1),
                digits = 2,
                leading.zero = T,
                caption = "",
                custom.note = "%stars. Entries are unstandardised coefficients from an OLS regression. Standard errors in brackets. The dependent variable is mean value of agreement with statements that A) COVID-19 is a bioweapon, B) COVID-19 is a natural disease (reversed), C) COVID-19 is a secret US military experiment, D) Bill Gates wants to vaccinate by force to earn money, E) 5G transmitter masts are responsible for COVID-19.  Agreement is measured on a 5-point scale from 1 (Very certain that this is false) to 5 ( Very certain that this is true). All variables are standardised between 0 and 1.")
